In [1]:
import pandas as pd
import numpy as np
import shutil
import multiprocessing
from datetime import datetime

import tensorflow as tf
from tensorflow.python.feature_column import feature_column
from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib.learn import make_export_strategy
from tensorflow import data

print(tf.__version__)


/Users/khalidsalama/anaconda/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
1.4.0

TF Custom Estimator to Build a NN Autoencoder for Feature Extraction


In [2]:
MODEL_NAME = 'auto-encoder-01'

TRAIN_DATA_FILES_PATTERN = 'data/data-*.csv'

RESUME_TRAINING = False

MULTI_THREADING = True

1. Define Dataset Metadata


In [3]:
FEATURE_COUNT = 64

HEADER = ['key']
HEADER_DEFAULTS = [[0]]
UNUSED_FEATURE_NAMES = ['key']
CLASS_FEATURE_NAME = 'CLASS'
FEATURE_NAMES = []  

for i in range(FEATURE_COUNT):
    HEADER += ['x_{}'.format(str(i+1))]
    FEATURE_NAMES += ['x_{}'.format(str(i+1))]
    HEADER_DEFAULTS += [[0.0]]

HEADER += [CLASS_FEATURE_NAME]
HEADER_DEFAULTS += [['NA']]

print("Header: {}".format(HEADER))
print("Features: {}".format(FEATURE_NAMES))
print("Class Feature: {}".format(CLASS_FEATURE_NAME))
print("Unused Features: {}".format(UNUSED_FEATURE_NAMES))


Header: ['key', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_10', 'x_11', 'x_12', 'x_13', 'x_14', 'x_15', 'x_16', 'x_17', 'x_18', 'x_19', 'x_20', 'x_21', 'x_22', 'x_23', 'x_24', 'x_25', 'x_26', 'x_27', 'x_28', 'x_29', 'x_30', 'x_31', 'x_32', 'x_33', 'x_34', 'x_35', 'x_36', 'x_37', 'x_38', 'x_39', 'x_40', 'x_41', 'x_42', 'x_43', 'x_44', 'x_45', 'x_46', 'x_47', 'x_48', 'x_49', 'x_50', 'x_51', 'x_52', 'x_53', 'x_54', 'x_55', 'x_56', 'x_57', 'x_58', 'x_59', 'x_60', 'x_61', 'x_62', 'x_63', 'x_64', 'CLASS']
Features: ['x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_10', 'x_11', 'x_12', 'x_13', 'x_14', 'x_15', 'x_16', 'x_17', 'x_18', 'x_19', 'x_20', 'x_21', 'x_22', 'x_23', 'x_24', 'x_25', 'x_26', 'x_27', 'x_28', 'x_29', 'x_30', 'x_31', 'x_32', 'x_33', 'x_34', 'x_35', 'x_36', 'x_37', 'x_38', 'x_39', 'x_40', 'x_41', 'x_42', 'x_43', 'x_44', 'x_45', 'x_46', 'x_47', 'x_48', 'x_49', 'x_50', 'x_51', 'x_52', 'x_53', 'x_54', 'x_55', 'x_56', 'x_57', 'x_58', 'x_59', 'x_60', 'x_61', 'x_62', 'x_63', 'x_64']
Class Feature: CLASS
Unused Features: ['key']

2. Define CSV Data Input Function


In [4]:
def parse_csv_row(csv_row):
    
    columns = tf.decode_csv(csv_row, record_defaults=HEADER_DEFAULTS)
    features = dict(zip(HEADER, columns))
    
    for column in UNUSED_FEATURE_NAMES:
        features.pop(column)

    target = features.pop(CLASS_FEATURE_NAME)

    return features, target

In [5]:
def csv_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, 
                 skip_header_lines=0, 
                 num_epochs=None, 
                 batch_size=200):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    print("")
    print("* data input_fn:")
    print("================")
    print("Input file(s): {}".format(files_name_pattern))
    print("Batch size: {}".format(batch_size))
    print("Epoch Count: {}".format(num_epochs))
    print("Mode: {}".format(mode))
    print("Shuffle: {}".format(shuffle))
    print("================")
    print("")
    
    file_names = tf.matching_files(files_name_pattern)

    dataset = data.TextLineDataset(filenames=file_names)
    dataset = dataset.skip(skip_header_lines)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)
        
    num_threads = multiprocessing.cpu_count() if MULTI_THREADING else 1
    
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row), num_parallel_calls=num_threads)
    
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()
    
    features, target = iterator.get_next()

    return features, target

In [6]:
features, target = csv_input_fn(files_name_pattern="")
print("Feature read from CSV: {}".format(list(features.keys())))
print("Target read from CSV: {}".format(target))


* data input_fn:
================
Input file(s): 
Batch size: 200
Epoch Count: None
Mode: eval
Shuffle: False
================

Feature read from CSV: ['x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_10', 'x_11', 'x_12', 'x_13', 'x_14', 'x_15', 'x_16', 'x_17', 'x_18', 'x_19', 'x_20', 'x_21', 'x_22', 'x_23', 'x_24', 'x_25', 'x_26', 'x_27', 'x_28', 'x_29', 'x_30', 'x_31', 'x_32', 'x_33', 'x_34', 'x_35', 'x_36', 'x_37', 'x_38', 'x_39', 'x_40', 'x_41', 'x_42', 'x_43', 'x_44', 'x_45', 'x_46', 'x_47', 'x_48', 'x_49', 'x_50', 'x_51', 'x_52', 'x_53', 'x_54', 'x_55', 'x_56', 'x_57', 'x_58', 'x_59', 'x_60', 'x_61', 'x_62', 'x_63', 'x_64']
Target read from CSV: Tensor("IteratorGetNext:64", shape=(?,), dtype=string)

3. Define Feature Columns

a. Load normalizarion params


In [7]:
df_params = pd.read_csv("data/params.csv", header=0, index_col=0)
len(df_params)
df_params['feature_name'] = FEATURE_NAMES
df_params.head()


Out[7]:
max mean min stdv feature_name
0 8.613757 -0.459068 -9.810630 2.931419 x_1
1 27.206817 0.452976 -40.614726 8.505586 x_2
2 3.127243 0.019996 -3.386526 1.014950 x_3
3 3.505720 -0.044230 -3.662128 0.992678 x_4
4 10.622091 0.912562 -10.656254 3.155885 x_5

b. Create normalized feature columns


In [8]:
def standard_scaler(x, mean, stdv):
    return (x-mean)/stdv

def maxmin_scaler(x, max_value, min_value):
    return (x-min_value)/(max_value-min_value)

def get_feature_columns():
    
    feature_columns = {}
    

#     feature_columns = {feature_name: tf.feature_column.numeric_column(feature_name)
#                        for feature_name in FEATURE_NAMES}

    for feature_name in FEATURE_NAMES:

        feature_max = df_params[df_params.feature_name == feature_name]['max'].values[0]
        feature_min = df_params[df_params.feature_name == feature_name]['min'].values[0]
        normalizer_fn = lambda x: maxmin_scaler(x, feature_max, feature_min)
        
        feature_columns[feature_name] = tf.feature_column.numeric_column(feature_name, 
                                                                         normalizer_fn=normalizer_fn
                                                                        )
 

    return feature_columns

print(get_feature_columns())


{'x_1': _NumericColumn(key='x_1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644048>), 'x_2': _NumericColumn(key='x_2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11862ae18>), 'x_3': _NumericColumn(key='x_3', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1160fe488>), 'x_4': _NumericColumn(key='x_4', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11862a598>), 'x_5': _NumericColumn(key='x_5', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11862ad90>), 'x_6': _NumericColumn(key='x_6', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653158>), 'x_7': _NumericColumn(key='x_7', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186531e0>), 'x_8': _NumericColumn(key='x_8', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186530d0>), 'x_9': _NumericColumn(key='x_9', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653048>), 'x_10': _NumericColumn(key='x_10', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653378>), 'x_11': _NumericColumn(key='x_11', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653400>), 'x_12': _NumericColumn(key='x_12', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653488>), 'x_13': _NumericColumn(key='x_13', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653510>), 'x_14': _NumericColumn(key='x_14', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653598>), 'x_15': _NumericColumn(key='x_15', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x10a7e6a60>), 'x_16': _NumericColumn(key='x_16', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118621400>), 'x_17': _NumericColumn(key='x_17', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644730>), 'x_18': _NumericColumn(key='x_18', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644950>), 'x_19': _NumericColumn(key='x_19', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186447b8>), 'x_20': _NumericColumn(key='x_20', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186442f0>), 'x_21': _NumericColumn(key='x_21', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644400>), 'x_22': _NumericColumn(key='x_22', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644488>), 'x_23': _NumericColumn(key='x_23', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644510>), 'x_24': _NumericColumn(key='x_24', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186449d8>), 'x_25': _NumericColumn(key='x_25', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644bf8>), 'x_26': _NumericColumn(key='x_26', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644c80>), 'x_27': _NumericColumn(key='x_27', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644d08>), 'x_28': _NumericColumn(key='x_28', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644d90>), 'x_29': _NumericColumn(key='x_29', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644e18>), 'x_30': _NumericColumn(key='x_30', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644ea0>), 'x_31': _NumericColumn(key='x_31', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644f28>), 'x_32': _NumericColumn(key='x_32', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118644a60>), 'x_33': _NumericColumn(key='x_33', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186440d0>), 'x_34': _NumericColumn(key='x_34', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186441e0>), 'x_35': _NumericColumn(key='x_35', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186536a8>), 'x_36': _NumericColumn(key='x_36', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653730>), 'x_37': _NumericColumn(key='x_37', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653620>), 'x_38': _NumericColumn(key='x_38', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186537b8>), 'x_39': _NumericColumn(key='x_39', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653840>), 'x_40': _NumericColumn(key='x_40', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186538c8>), 'x_41': _NumericColumn(key='x_41', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653950>), 'x_42': _NumericColumn(key='x_42', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x1186539d8>), 'x_43': _NumericColumn(key='x_43', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653a60>), 'x_44': _NumericColumn(key='x_44', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653ae8>), 'x_45': _NumericColumn(key='x_45', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653b70>), 'x_46': _NumericColumn(key='x_46', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653bf8>), 'x_47': _NumericColumn(key='x_47', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653c80>), 'x_48': _NumericColumn(key='x_48', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653d08>), 'x_49': _NumericColumn(key='x_49', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653d90>), 'x_50': _NumericColumn(key='x_50', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653e18>), 'x_51': _NumericColumn(key='x_51', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653f28>), 'x_52': _NumericColumn(key='x_52', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x118653ea0>), 'x_53': _NumericColumn(key='x_53', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f0d0>), 'x_54': _NumericColumn(key='x_54', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f048>), 'x_55': _NumericColumn(key='x_55', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f158>), 'x_56': _NumericColumn(key='x_56', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f1e0>), 'x_57': _NumericColumn(key='x_57', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f268>), 'x_58': _NumericColumn(key='x_58', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f2f0>), 'x_59': _NumericColumn(key='x_59', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f378>), 'x_60': _NumericColumn(key='x_60', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f400>), 'x_61': _NumericColumn(key='x_61', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f488>), 'x_62': _NumericColumn(key='x_62', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f510>), 'x_63': _NumericColumn(key='x_63', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f598>), 'x_64': _NumericColumn(key='x_64', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_feature_columns.<locals>.<lambda> at 0x11881f620>)}

4. Define Autoencoder Model Function


In [9]:
def autoencoder_model_fn(features, labels, mode, params):
    
    feature_columns = list(get_feature_columns().values())
    
    input_layer_size = len(feature_columns)
    
    encoder_hidden_units = params.encoder_hidden_units
    
    # decoder units are the reverse of the encoder units, without the middle layer (redundant)
    decoder_hidden_units = encoder_hidden_units.copy()  
    decoder_hidden_units.reverse()
    decoder_hidden_units.pop(0)
    
    output_layer_size = len(FEATURE_NAMES)
    
    he_initialiser = tf.contrib.layers.variance_scaling_initializer()
    l2_regulariser = tf.contrib.layers.l2_regularizer(scale=params.l2_reg)
    
  
    print("[{}]->{}-{}->[{}]".format(len(feature_columns)
                                     ,encoder_hidden_units
                                     ,decoder_hidden_units,
                                     output_layer_size))

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    
    # input layer
    input_layer = tf.feature_column.input_layer(features=features, 
                                                feature_columns=feature_columns)
    
    # Adding Gaussian Noise to input layer
    noisy_input_layer = input_layer + (params.noise_level * tf.random_normal(tf.shape(input_layer)))
    
    # Dropout layer
    dropout_layer = tf.layers.dropout(inputs=noisy_input_layer, 
                                     rate=params.dropout_rate, 
                                     training=is_training)

#     # Dropout layer without Gaussian Nosing
#     dropout_layer = tf.layers.dropout(inputs=input_layer, 
#                                       rate=params.dropout_rate, 
#                                       training=is_training)

    # Encoder layers stack
    encoding_hidden_layers = tf.contrib.layers.stack(inputs= dropout_layer,
                                                     layer= tf.contrib.layers.fully_connected,
                                                     stack_args=encoder_hidden_units,
                                                     #weights_initializer = he_init,
                                                     weights_regularizer =l2_regulariser,
                                                     activation_fn = tf.nn.relu
                                                    )
    # Decoder layers stack
    decoding_hidden_layers = tf.contrib.layers.stack(inputs=encoding_hidden_layers,
                                                     layer=tf.contrib.layers.fully_connected,                
                                                     stack_args=decoder_hidden_units,
                                                     #weights_initializer = he_init,
                                                     weights_regularizer =l2_regulariser,
                                                     activation_fn = tf.nn.relu
                                                    )
    # Output (reconstructed) layer
    output_layer = tf.layers.dense(inputs=decoding_hidden_layers, 
                             units=output_layer_size, activation=None)
    
    # Encoding output (i.e., extracted features) reshaped
    encoding_output = tf.squeeze(encoding_hidden_layers)
    
    # Reconstruction output reshaped (for serving function)
    reconstruction_output =  tf.squeeze(tf.nn.sigmoid(output_layer))
    
    # Provide an estimator spec for `ModeKeys.PREDICT`.
    if mode == tf.estimator.ModeKeys.PREDICT:
        
        # Convert predicted_indices back into strings
        predictions = {
            'encoding': encoding_output,
            'reconstruction': reconstruction_output
        }
        export_outputs = {
            'predict': tf.estimator.export.PredictOutput(predictions)
        }
        
        # Provide an estimator spec for `ModeKeys.PREDICT` modes.
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)
    
    # Define loss based on reconstruction and regularization
    
#     reconstruction_loss = tf.losses.mean_squared_error(tf.squeeze(input_layer), reconstruction_output) 
#     loss = reconstruction_loss + tf.losses.get_regularization_loss()
    
    reconstruction_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.squeeze(input_layer), logits=tf.squeeze(output_layer))
    loss = reconstruction_loss + tf.losses.get_regularization_loss()
                         
    # Create Optimiser
    optimizer = tf.train.AdamOptimizer(params.learning_rate)

    # Create training operation
    train_op = optimizer.minimize(
        loss=loss, global_step=tf.train.get_global_step())

    # Calculate root mean squared error as additional eval metric
    eval_metric_ops = {
        "rmse": tf.metrics.root_mean_squared_error(
            tf.squeeze(input_layer), reconstruction_output)
    }
                                                     
    # Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
    estimator_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                loss=loss,
                                                train_op=train_op,
                                                eval_metric_ops=eval_metric_ops)
    return estimator_spec


def create_estimator(run_config, hparams):
    estimator = tf.estimator.Estimator(model_fn=autoencoder_model_fn, 
                                  params=hparams, 
                                  config=run_config)
    
    print("")
    print("Estimator Type: {}".format(type(estimator)))
    print("")

    return estimator

5. Run Experiment using Estimator Train_And_Evaluate

a. Set the parameters


In [10]:
TRAIN_SIZE = 2000
NUM_EPOCHS = 1000
BATCH_SIZE = 100
NUM_EVAL = 10

TOTAL_STEPS = (TRAIN_SIZE/BATCH_SIZE)*NUM_EPOCHS
CHECKPOINT_STEPS = int((TRAIN_SIZE/BATCH_SIZE) * (NUM_EPOCHS/NUM_EVAL))

hparams  = tf.contrib.training.HParams(
    num_epochs = NUM_EPOCHS,
    batch_size = BATCH_SIZE,
    encoder_hidden_units=[30,3],
    learning_rate = 0.01,
    l2_reg = 0.0001,
    noise_level = 0.0,
    max_steps = TOTAL_STEPS,
    dropout_rate = 0.05
)

model_dir = 'trained_models/{}'.format(MODEL_NAME)

run_config = tf.contrib.learn.RunConfig(
    save_checkpoints_steps=CHECKPOINT_STEPS,
    tf_random_seed=19830610,
    model_dir=model_dir
)

print(hparams)
print("Model Directory:", run_config.model_dir)
print("")
print("Dataset Size:", TRAIN_SIZE)
print("Batch Size:", BATCH_SIZE)
print("Steps per Epoch:",TRAIN_SIZE/BATCH_SIZE)
print("Total Steps:", TOTAL_STEPS)
print("Required Evaluation Steps:", NUM_EVAL) 
print("That is 1 evaluation step after each",NUM_EPOCHS/NUM_EVAL," epochs")
print("Save Checkpoint After",CHECKPOINT_STEPS,"steps")


[('batch_size', 100), ('dropout_rate', 0.05), ('encoder_hidden_units', [30, 3]), ('l2_reg', 0.0001), ('learning_rate', 0.01), ('max_steps', 20000.0), ('noise_level', 0.0), ('num_epochs', 1000)]
Model Directory: trained_models/auto-encoder-01

Dataset Size: 2000
Batch Size: 100
Steps per Epoch: 20.0
Total Steps: 20000.0
Required Evaluation Steps: 10
That is 1 evaluation step after each 100.0  epochs
Save Checkpoint After 2000 steps

b. Define TrainSpec and EvaluSpec


In [11]:
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: csv_input_fn(
        TRAIN_DATA_FILES_PATTERN,
        mode = tf.contrib.learn.ModeKeys.TRAIN,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.batch_size
    ),
    max_steps=hparams.max_steps,
    hooks=None
)

eval_spec = tf.estimator.EvalSpec(
    input_fn = lambda: csv_input_fn(
        TRAIN_DATA_FILES_PATTERN,
        mode=tf.contrib.learn.ModeKeys.EVAL,
        num_epochs=1,
        batch_size=hparams.batch_size
    ),
#     exporters=[tf.estimator.LatestExporter(
#         name="encode",  # the name of the folder in which the model will be exported to under export
#         serving_input_receiver_fn=csv_serving_input_fn,
#         exports_to_keep=1,
#         as_text=True)],
    steps=None,
    hooks=None
)

d. Run Experiment via train_and_evaluate


In [12]:
if not RESUME_TRAINING:
    print("Removing previous artifacts...")
    shutil.rmtree(model_dir, ignore_errors=True)
else:
    print("Resuming training...") 

    
tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow() 
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

estimator = create_estimator(run_config, hparams)

tf.estimator.train_and_evaluate(
    estimator=estimator,
    train_spec=train_spec, 
    eval_spec=eval_spec
)

time_end = datetime.utcnow() 
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))


Removing previous artifacts...
Experiment started at 21:41:40
.......................................
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x118884668>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 2000, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'trained_models/auto-encoder-01'}

Estimator Type: <class 'tensorflow.python.estimator.estimator.Estimator'>

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.

* data input_fn:
================
Input file(s): data/data-*.csv
Batch size: 100
Epoch Count: 1000
Mode: train
Shuffle: True
================

[64]->[30, 3]-[30]->[64]
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:loss = 0.695666, step = 1
INFO:tensorflow:global_step/sec: 103.458
INFO:tensorflow:loss = 0.613976, step = 101 (0.968 sec)
INFO:tensorflow:global_step/sec: 142.531
INFO:tensorflow:loss = -22.5053, step = 201 (0.704 sec)
INFO:tensorflow:global_step/sec: 143.048
INFO:tensorflow:loss = -511.17, step = 301 (0.696 sec)
INFO:tensorflow:global_step/sec: 141.398
INFO:tensorflow:loss = -2646.65, step = 401 (0.707 sec)
INFO:tensorflow:global_step/sec: 144.899
INFO:tensorflow:loss = -9294.64, step = 501 (0.690 sec)
INFO:tensorflow:global_step/sec: 142.906
INFO:tensorflow:loss = -37804.6, step = 601 (0.700 sec)
INFO:tensorflow:global_step/sec: 141.667
INFO:tensorflow:loss = -40352.9, step = 701 (0.706 sec)
INFO:tensorflow:global_step/sec: 143.845
INFO:tensorflow:loss = -61870.8, step = 801 (0.695 sec)
INFO:tensorflow:global_step/sec: 143.765
INFO:tensorflow:loss = -186910.0, step = 901 (0.696 sec)
INFO:tensorflow:global_step/sec: 141.418
INFO:tensorflow:loss = -195133.0, step = 1001 (0.707 sec)
INFO:tensorflow:global_step/sec: 142.911
INFO:tensorflow:loss = -259042.0, step = 1101 (0.700 sec)
INFO:tensorflow:global_step/sec: 141.555
INFO:tensorflow:loss = -698791.0, step = 1201 (0.706 sec)
INFO:tensorflow:global_step/sec: 144.079
INFO:tensorflow:loss = -152529.0, step = 1301 (0.694 sec)
INFO:tensorflow:global_step/sec: 141.807
INFO:tensorflow:loss = -797219.0, step = 1401 (0.705 sec)
INFO:tensorflow:global_step/sec: 142.896
INFO:tensorflow:loss = -924645.0, step = 1501 (0.700 sec)
INFO:tensorflow:global_step/sec: 141.431
INFO:tensorflow:loss = -1.39466e+06, step = 1601 (0.707 sec)
INFO:tensorflow:global_step/sec: 144.005
INFO:tensorflow:loss = -2.11370e+06, step = 1701 (0.695 sec)
INFO:tensorflow:global_step/sec: 142.571
INFO:tensorflow:loss = -266958.0, step = 1801 (0.701 sec)
INFO:tensorflow:global_step/sec: 142.206
INFO:tensorflow:loss = -3.3687e+06, step = 1901 (0.703 sec)
INFO:tensorflow:Saving checkpoints for 2001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 51.2284
INFO:tensorflow:loss = -4.90631e+06, step = 2001 (1.953 sec)
INFO:tensorflow:global_step/sec: 139.123
INFO:tensorflow:loss = -4.14176e+06, step = 2101 (0.718 sec)
INFO:tensorflow:global_step/sec: 140.783
INFO:tensorflow:loss = -3.68804e+06, step = 2201 (0.710 sec)
INFO:tensorflow:global_step/sec: 141.944
INFO:tensorflow:loss = -5.45786e+06, step = 2301 (0.704 sec)
INFO:tensorflow:global_step/sec: 141.337
INFO:tensorflow:loss = -5.96973e+06, step = 2401 (0.708 sec)
INFO:tensorflow:global_step/sec: 139.018
INFO:tensorflow:loss = -3.85333e+06, step = 2501 (0.719 sec)
INFO:tensorflow:global_step/sec: 140.809
INFO:tensorflow:loss = -8.90947e+06, step = 2601 (0.710 sec)
INFO:tensorflow:global_step/sec: 142.953
INFO:tensorflow:loss = -1.36037e+07, step = 2701 (0.700 sec)
INFO:tensorflow:global_step/sec: 139.948
INFO:tensorflow:loss = -1.24019e+07, step = 2801 (0.715 sec)
INFO:tensorflow:global_step/sec: 140.424
INFO:tensorflow:loss = -1.15362e+07, step = 2901 (0.713 sec)
INFO:tensorflow:global_step/sec: 140.319
INFO:tensorflow:loss = -1.05245e+07, step = 3001 (0.712 sec)
INFO:tensorflow:global_step/sec: 139.685
INFO:tensorflow:loss = -2.14483e+07, step = 3101 (0.716 sec)
INFO:tensorflow:global_step/sec: 141.117
INFO:tensorflow:loss = -1.02466e+07, step = 3201 (0.709 sec)
INFO:tensorflow:global_step/sec: 141.113
INFO:tensorflow:loss = -2.53656e+07, step = 3301 (0.709 sec)
INFO:tensorflow:global_step/sec: 140.567
INFO:tensorflow:loss = -3.09483e+07, step = 3401 (0.712 sec)
INFO:tensorflow:global_step/sec: 142.008
INFO:tensorflow:loss = -1.05858e+07, step = 3501 (0.704 sec)
INFO:tensorflow:global_step/sec: 140.854
INFO:tensorflow:loss = -4.49928e+07, step = 3601 (0.710 sec)
INFO:tensorflow:global_step/sec: 141.354
INFO:tensorflow:loss = -2.81104e+07, step = 3701 (0.708 sec)
INFO:tensorflow:global_step/sec: 134.908
INFO:tensorflow:loss = -4.05992e+07, step = 3801 (0.741 sec)
INFO:tensorflow:global_step/sec: 143.242
INFO:tensorflow:loss = -3.58284e+07, step = 3901 (0.698 sec)
INFO:tensorflow:Saving checkpoints for 4001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 50.5982
INFO:tensorflow:loss = -4.65278e+07, step = 4001 (1.977 sec)
INFO:tensorflow:global_step/sec: 139.943
INFO:tensorflow:loss = -4.53966e+07, step = 4101 (0.714 sec)
INFO:tensorflow:global_step/sec: 142.26
INFO:tensorflow:loss = -1.89346e+07, step = 4201 (0.703 sec)
INFO:tensorflow:global_step/sec: 138.033
INFO:tensorflow:loss = -3.31856e+07, step = 4301 (0.724 sec)
INFO:tensorflow:global_step/sec: 139.574
INFO:tensorflow:loss = -5.90151e+07, step = 4401 (0.717 sec)
INFO:tensorflow:global_step/sec: 142.614
INFO:tensorflow:loss = -5.1197e+07, step = 4501 (0.701 sec)
INFO:tensorflow:global_step/sec: 141.431
INFO:tensorflow:loss = -7.71718e+07, step = 4601 (0.707 sec)
INFO:tensorflow:global_step/sec: 139.259
INFO:tensorflow:loss = -6.23485e+07, step = 4701 (0.718 sec)
INFO:tensorflow:global_step/sec: 139.791
INFO:tensorflow:loss = -2.07544e+07, step = 4801 (0.715 sec)
INFO:tensorflow:global_step/sec: 140.503
INFO:tensorflow:loss = -9.322e+07, step = 4901 (0.712 sec)
INFO:tensorflow:global_step/sec: 140.675
INFO:tensorflow:loss = -8.12346e+07, step = 5001 (0.711 sec)
INFO:tensorflow:global_step/sec: 136.884
INFO:tensorflow:loss = -1.45652e+08, step = 5101 (0.731 sec)
INFO:tensorflow:global_step/sec: 141.842
INFO:tensorflow:loss = -8.65107e+07, step = 5201 (0.705 sec)
INFO:tensorflow:global_step/sec: 142.196
INFO:tensorflow:loss = -9.71459e+07, step = 5301 (0.703 sec)
INFO:tensorflow:global_step/sec: 140.692
INFO:tensorflow:loss = -1.19848e+08, step = 5401 (0.711 sec)
INFO:tensorflow:global_step/sec: 144.001
INFO:tensorflow:loss = -2.64142e+07, step = 5501 (0.694 sec)
INFO:tensorflow:global_step/sec: 140.862
INFO:tensorflow:loss = -1.40026e+08, step = 5601 (0.710 sec)
INFO:tensorflow:global_step/sec: 140.472
INFO:tensorflow:loss = -1.25818e+08, step = 5701 (0.712 sec)
INFO:tensorflow:global_step/sec: 140.194
INFO:tensorflow:loss = -1.80938e+08, step = 5801 (0.713 sec)
INFO:tensorflow:global_step/sec: 141.931
INFO:tensorflow:loss = -1.00386e+08, step = 5901 (0.705 sec)
INFO:tensorflow:Saving checkpoints for 6001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 53.9011
INFO:tensorflow:loss = -1.15516e+08, step = 6001 (1.855 sec)
INFO:tensorflow:global_step/sec: 138.326
INFO:tensorflow:loss = -2.67414e+08, step = 6101 (0.723 sec)
INFO:tensorflow:global_step/sec: 138.914
INFO:tensorflow:loss = -1.91427e+08, step = 6201 (0.720 sec)
INFO:tensorflow:global_step/sec: 139.247
INFO:tensorflow:loss = -1.33759e+08, step = 6301 (0.718 sec)
INFO:tensorflow:global_step/sec: 139.089
INFO:tensorflow:loss = -2.12749e+08, step = 6401 (0.721 sec)
INFO:tensorflow:global_step/sec: 139.949
INFO:tensorflow:loss = -1.69334e+08, step = 6501 (0.713 sec)
INFO:tensorflow:global_step/sec: 141.121
INFO:tensorflow:loss = -1.68339e+08, step = 6601 (0.709 sec)
INFO:tensorflow:global_step/sec: 141.021
INFO:tensorflow:loss = -1.3578e+08, step = 6701 (0.709 sec)
INFO:tensorflow:global_step/sec: 142.452
INFO:tensorflow:loss = -2.25908e+08, step = 6801 (0.702 sec)
INFO:tensorflow:global_step/sec: 143.303
INFO:tensorflow:loss = -3.92238e+08, step = 6901 (0.698 sec)
INFO:tensorflow:global_step/sec: 143.688
INFO:tensorflow:loss = -8.33598e+07, step = 7001 (0.697 sec)
INFO:tensorflow:global_step/sec: 144.553
INFO:tensorflow:loss = -1.61669e+08, step = 7101 (0.691 sec)
INFO:tensorflow:global_step/sec: 140.124
INFO:tensorflow:loss = -1.98041e+08, step = 7201 (0.713 sec)
INFO:tensorflow:global_step/sec: 142.028
INFO:tensorflow:loss = -4.24831e+08, step = 7301 (0.704 sec)
INFO:tensorflow:global_step/sec: 140.358
INFO:tensorflow:loss = -4.38497e+08, step = 7401 (0.712 sec)
INFO:tensorflow:global_step/sec: 141.39
INFO:tensorflow:loss = -1.45828e+08, step = 7501 (0.707 sec)
INFO:tensorflow:global_step/sec: 144.403
INFO:tensorflow:loss = -3.24311e+08, step = 7601 (0.693 sec)
INFO:tensorflow:global_step/sec: 143.932
INFO:tensorflow:loss = -2.01757e+08, step = 7701 (0.695 sec)
INFO:tensorflow:global_step/sec: 142.946
INFO:tensorflow:loss = -1.7144e+08, step = 7801 (0.700 sec)
INFO:tensorflow:global_step/sec: 143.218
INFO:tensorflow:loss = -3.76991e+08, step = 7901 (0.698 sec)
INFO:tensorflow:Saving checkpoints for 8001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 49.4375
INFO:tensorflow:loss = -6.47176e+08, step = 8001 (2.023 sec)
INFO:tensorflow:global_step/sec: 139.759
INFO:tensorflow:loss = -3.26687e+08, step = 8101 (0.715 sec)
INFO:tensorflow:global_step/sec: 143.492
INFO:tensorflow:loss = -2.98137e+08, step = 8201 (0.697 sec)
INFO:tensorflow:global_step/sec: 144.098
INFO:tensorflow:loss = -3.9036e+08, step = 8301 (0.694 sec)
INFO:tensorflow:global_step/sec: 140.788
INFO:tensorflow:loss = -3.71059e+08, step = 8401 (0.710 sec)
INFO:tensorflow:global_step/sec: 143.07
INFO:tensorflow:loss = -4.52222e+08, step = 8501 (0.699 sec)
INFO:tensorflow:global_step/sec: 142.553
INFO:tensorflow:loss = -4.57539e+08, step = 8601 (0.701 sec)
INFO:tensorflow:global_step/sec: 143.504
INFO:tensorflow:loss = -3.4115e+08, step = 8701 (0.697 sec)
INFO:tensorflow:global_step/sec: 143.283
INFO:tensorflow:loss = -5.46016e+08, step = 8801 (0.698 sec)
INFO:tensorflow:global_step/sec: 142.628
INFO:tensorflow:loss = -4.47694e+08, step = 8901 (0.701 sec)
INFO:tensorflow:global_step/sec: 144.55
INFO:tensorflow:loss = -4.7832e+08, step = 9001 (0.692 sec)
INFO:tensorflow:global_step/sec: 142.17
INFO:tensorflow:loss = -7.28332e+08, step = 9101 (0.703 sec)
INFO:tensorflow:global_step/sec: 141.513
INFO:tensorflow:loss = -2.86629e+08, step = 9201 (0.707 sec)
INFO:tensorflow:global_step/sec: 141.205
INFO:tensorflow:loss = -7.04112e+08, step = 9301 (0.708 sec)
INFO:tensorflow:global_step/sec: 142.703
INFO:tensorflow:loss = -1.10194e+08, step = 9401 (0.701 sec)
INFO:tensorflow:global_step/sec: 142.293
INFO:tensorflow:loss = -6.21675e+08, step = 9501 (0.703 sec)
INFO:tensorflow:global_step/sec: 139.997
INFO:tensorflow:loss = -7.37756e+08, step = 9601 (0.714 sec)
INFO:tensorflow:global_step/sec: 141.41
INFO:tensorflow:loss = -8.66114e+08, step = 9701 (0.708 sec)
INFO:tensorflow:global_step/sec: 142.425
INFO:tensorflow:loss = -6.84963e+08, step = 9801 (0.702 sec)
INFO:tensorflow:global_step/sec: 141.609
INFO:tensorflow:loss = -1.18232e+09, step = 9901 (0.706 sec)
INFO:tensorflow:Saving checkpoints for 10001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 54.4047
INFO:tensorflow:loss = -7.97381e+08, step = 10001 (1.838 sec)
INFO:tensorflow:global_step/sec: 141.017
INFO:tensorflow:loss = -3.21004e+08, step = 10101 (0.709 sec)
INFO:tensorflow:global_step/sec: 139.348
INFO:tensorflow:loss = -1.29598e+09, step = 10201 (0.718 sec)
INFO:tensorflow:global_step/sec: 139.447
INFO:tensorflow:loss = -5.36019e+08, step = 10301 (0.717 sec)
INFO:tensorflow:global_step/sec: 139.2
INFO:tensorflow:loss = -9.41272e+08, step = 10401 (0.718 sec)
INFO:tensorflow:global_step/sec: 143.138
INFO:tensorflow:loss = -1.09451e+09, step = 10501 (0.699 sec)
INFO:tensorflow:global_step/sec: 140.538
INFO:tensorflow:loss = -1.30059e+09, step = 10601 (0.711 sec)
INFO:tensorflow:global_step/sec: 142.967
INFO:tensorflow:loss = -1.1937e+09, step = 10701 (0.700 sec)
INFO:tensorflow:global_step/sec: 142.494
INFO:tensorflow:loss = -9.23008e+08, step = 10801 (0.702 sec)
INFO:tensorflow:global_step/sec: 139.96
INFO:tensorflow:loss = -1.17736e+09, step = 10901 (0.714 sec)
INFO:tensorflow:global_step/sec: 140.62
INFO:tensorflow:loss = -6.67815e+08, step = 11001 (0.711 sec)
INFO:tensorflow:global_step/sec: 141.765
INFO:tensorflow:loss = -5.84613e+08, step = 11101 (0.705 sec)
INFO:tensorflow:global_step/sec: 142.42
INFO:tensorflow:loss = -1.23956e+09, step = 11201 (0.702 sec)
INFO:tensorflow:global_step/sec: 141.821
INFO:tensorflow:loss = -1.12848e+09, step = 11301 (0.705 sec)
INFO:tensorflow:global_step/sec: 141.498
INFO:tensorflow:loss = -7.16786e+08, step = 11401 (0.707 sec)
INFO:tensorflow:global_step/sec: 143.064
INFO:tensorflow:loss = -1.20481e+09, step = 11501 (0.700 sec)
INFO:tensorflow:global_step/sec: 141.059
INFO:tensorflow:loss = -8.03406e+08, step = 11601 (0.708 sec)
INFO:tensorflow:global_step/sec: 140.95
INFO:tensorflow:loss = -1.42086e+09, step = 11701 (0.709 sec)
INFO:tensorflow:global_step/sec: 139.647
INFO:tensorflow:loss = -2.38389e+09, step = 11801 (0.716 sec)
INFO:tensorflow:global_step/sec: 141.696
INFO:tensorflow:loss = -5.81655e+08, step = 11901 (0.706 sec)
INFO:tensorflow:Saving checkpoints for 12001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 50.8308
INFO:tensorflow:loss = -1.37286e+09, step = 12001 (1.967 sec)
INFO:tensorflow:global_step/sec: 139.761
INFO:tensorflow:loss = -1.86609e+09, step = 12101 (0.715 sec)
INFO:tensorflow:global_step/sec: 142.857
INFO:tensorflow:loss = -2.18507e+09, step = 12201 (0.700 sec)
INFO:tensorflow:global_step/sec: 132.305
INFO:tensorflow:loss = -8.55965e+08, step = 12301 (0.756 sec)
INFO:tensorflow:global_step/sec: 143.367
INFO:tensorflow:loss = -1.48408e+09, step = 12401 (0.697 sec)
INFO:tensorflow:global_step/sec: 140.488
INFO:tensorflow:loss = -1.25136e+09, step = 12501 (0.712 sec)
INFO:tensorflow:global_step/sec: 136.879
INFO:tensorflow:loss = -1.80592e+09, step = 12601 (0.731 sec)
INFO:tensorflow:global_step/sec: 139.92
INFO:tensorflow:loss = -1.54763e+09, step = 12701 (0.715 sec)
INFO:tensorflow:global_step/sec: 145.003
INFO:tensorflow:loss = -2.55634e+09, step = 12801 (0.689 sec)
INFO:tensorflow:global_step/sec: 144.699
INFO:tensorflow:loss = -1.27625e+09, step = 12901 (0.691 sec)
INFO:tensorflow:global_step/sec: 141.927
INFO:tensorflow:loss = -1.40914e+09, step = 13001 (0.705 sec)
INFO:tensorflow:global_step/sec: 143.371
INFO:tensorflow:loss = -1.86804e+09, step = 13101 (0.698 sec)
INFO:tensorflow:global_step/sec: 142.64
INFO:tensorflow:loss = -2.00721e+09, step = 13201 (0.701 sec)
INFO:tensorflow:global_step/sec: 145.159
INFO:tensorflow:loss = -9.94468e+08, step = 13301 (0.689 sec)
INFO:tensorflow:global_step/sec: 144.428
INFO:tensorflow:loss = -8.48483e+08, step = 13401 (0.693 sec)
INFO:tensorflow:global_step/sec: 143.881
INFO:tensorflow:loss = -1.78787e+09, step = 13501 (0.695 sec)
INFO:tensorflow:global_step/sec: 138.529
INFO:tensorflow:loss = -1.9492e+09, step = 13601 (0.722 sec)
INFO:tensorflow:global_step/sec: 142.504
INFO:tensorflow:loss = -2.28215e+09, step = 13701 (0.702 sec)
INFO:tensorflow:global_step/sec: 145.718
INFO:tensorflow:loss = -2.40443e+09, step = 13801 (0.686 sec)
INFO:tensorflow:global_step/sec: 142.999
INFO:tensorflow:loss = -2.74865e+09, step = 13901 (0.699 sec)
INFO:tensorflow:Saving checkpoints for 14001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 49.8778
INFO:tensorflow:loss = -1.00535e+09, step = 14001 (2.005 sec)
INFO:tensorflow:global_step/sec: 137.053
INFO:tensorflow:loss = -2.83681e+09, step = 14101 (0.730 sec)
INFO:tensorflow:global_step/sec: 140.339
INFO:tensorflow:loss = -2.44134e+09, step = 14201 (0.713 sec)
INFO:tensorflow:global_step/sec: 139.074
INFO:tensorflow:loss = -2.29439e+09, step = 14301 (0.719 sec)
INFO:tensorflow:global_step/sec: 143.811
INFO:tensorflow:loss = -1.3227e+09, step = 14401 (0.696 sec)
INFO:tensorflow:global_step/sec: 137.196
INFO:tensorflow:loss = -3.67736e+09, step = 14501 (0.729 sec)
INFO:tensorflow:global_step/sec: 142.174
INFO:tensorflow:loss = -2.74288e+09, step = 14601 (0.703 sec)
INFO:tensorflow:global_step/sec: 140.236
INFO:tensorflow:loss = -3.35358e+09, step = 14701 (0.713 sec)
INFO:tensorflow:global_step/sec: 143.86
INFO:tensorflow:loss = -2.70737e+09, step = 14801 (0.695 sec)
INFO:tensorflow:global_step/sec: 142.46
INFO:tensorflow:loss = -1.38112e+09, step = 14901 (0.702 sec)
INFO:tensorflow:global_step/sec: 142.884
INFO:tensorflow:loss = -1.17051e+09, step = 15001 (0.704 sec)
INFO:tensorflow:global_step/sec: 140.569
INFO:tensorflow:loss = -1.82866e+09, step = 15101 (0.707 sec)
INFO:tensorflow:global_step/sec: 143.509
INFO:tensorflow:loss = -2.87066e+09, step = 15201 (0.697 sec)
INFO:tensorflow:global_step/sec: 142.725
INFO:tensorflow:loss = -2.77202e+09, step = 15301 (0.701 sec)
INFO:tensorflow:global_step/sec: 141.248
INFO:tensorflow:loss = -2.12348e+09, step = 15401 (0.708 sec)
INFO:tensorflow:global_step/sec: 142.864
INFO:tensorflow:loss = -3.26434e+09, step = 15501 (0.700 sec)
INFO:tensorflow:global_step/sec: 142.763
INFO:tensorflow:loss = -5.66948e+08, step = 15601 (0.700 sec)
INFO:tensorflow:global_step/sec: 142.202
INFO:tensorflow:loss = -2.73008e+09, step = 15701 (0.703 sec)
INFO:tensorflow:global_step/sec: 139.945
INFO:tensorflow:loss = -6.06809e+09, step = 15801 (0.714 sec)
INFO:tensorflow:global_step/sec: 142.821
INFO:tensorflow:loss = -2.64382e+09, step = 15901 (0.700 sec)
INFO:tensorflow:Saving checkpoints for 16001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 54.2193
INFO:tensorflow:loss = -5.77576e+09, step = 16001 (1.844 sec)
INFO:tensorflow:global_step/sec: 141.279
INFO:tensorflow:loss = -5.29185e+09, step = 16101 (0.708 sec)
INFO:tensorflow:global_step/sec: 113.344
INFO:tensorflow:loss = -3.17257e+09, step = 16201 (0.883 sec)
INFO:tensorflow:global_step/sec: 108.11
INFO:tensorflow:loss = -1.95277e+09, step = 16301 (0.924 sec)
INFO:tensorflow:global_step/sec: 110.429
INFO:tensorflow:loss = -4.6037e+09, step = 16401 (0.905 sec)
INFO:tensorflow:global_step/sec: 109.631
INFO:tensorflow:loss = -5.55385e+09, step = 16501 (0.913 sec)
INFO:tensorflow:global_step/sec: 107.602
INFO:tensorflow:loss = -4.05116e+09, step = 16601 (0.929 sec)
INFO:tensorflow:global_step/sec: 135.86
INFO:tensorflow:loss = -7.90738e+09, step = 16701 (0.736 sec)
INFO:tensorflow:global_step/sec: 137.79
INFO:tensorflow:loss = -4.25153e+09, step = 16801 (0.726 sec)
INFO:tensorflow:global_step/sec: 143.779
INFO:tensorflow:loss = -4.12346e+09, step = 16901 (0.695 sec)
INFO:tensorflow:global_step/sec: 146.203
INFO:tensorflow:loss = -2.68744e+09, step = 17001 (0.684 sec)
INFO:tensorflow:global_step/sec: 143.446
INFO:tensorflow:loss = -5.88048e+09, step = 17101 (0.697 sec)
INFO:tensorflow:global_step/sec: 142.369
INFO:tensorflow:loss = -4.73647e+09, step = 17201 (0.702 sec)
INFO:tensorflow:global_step/sec: 144.753
INFO:tensorflow:loss = -6.54027e+09, step = 17301 (0.691 sec)
INFO:tensorflow:global_step/sec: 143.372
INFO:tensorflow:loss = -6.79307e+09, step = 17401 (0.698 sec)
INFO:tensorflow:global_step/sec: 145.415
INFO:tensorflow:loss = -2.57023e+09, step = 17501 (0.687 sec)
INFO:tensorflow:global_step/sec: 145.504
INFO:tensorflow:loss = -4.35197e+09, step = 17601 (0.687 sec)
INFO:tensorflow:global_step/sec: 144.309
INFO:tensorflow:loss = -4.70664e+09, step = 17701 (0.693 sec)
INFO:tensorflow:global_step/sec: 144.048
INFO:tensorflow:loss = -3.88805e+09, step = 17801 (0.694 sec)
INFO:tensorflow:global_step/sec: 144.356
INFO:tensorflow:loss = -4.40114e+09, step = 17901 (0.693 sec)
INFO:tensorflow:Saving checkpoints for 18001 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:global_step/sec: 49.9686
INFO:tensorflow:loss = -6.56144e+09, step = 18001 (2.002 sec)
INFO:tensorflow:global_step/sec: 139.112
INFO:tensorflow:loss = -5.09428e+09, step = 18101 (0.718 sec)
INFO:tensorflow:global_step/sec: 142.348
INFO:tensorflow:loss = -7.19485e+09, step = 18201 (0.703 sec)
INFO:tensorflow:global_step/sec: 138.861
INFO:tensorflow:loss = -2.58019e+09, step = 18301 (0.720 sec)
INFO:tensorflow:global_step/sec: 142.223
INFO:tensorflow:loss = -5.15616e+09, step = 18401 (0.703 sec)
INFO:tensorflow:global_step/sec: 143.033
INFO:tensorflow:loss = -4.53271e+09, step = 18501 (0.699 sec)
INFO:tensorflow:global_step/sec: 140.961
INFO:tensorflow:loss = -5.32002e+09, step = 18601 (0.709 sec)
INFO:tensorflow:global_step/sec: 140.56
INFO:tensorflow:loss = -5.09997e+09, step = 18701 (0.711 sec)
INFO:tensorflow:global_step/sec: 139.721
INFO:tensorflow:loss = -5.42638e+09, step = 18801 (0.716 sec)
INFO:tensorflow:global_step/sec: 139.63
INFO:tensorflow:loss = -8.11847e+09, step = 18901 (0.716 sec)
INFO:tensorflow:global_step/sec: 142.659
INFO:tensorflow:loss = -3.77758e+09, step = 19001 (0.701 sec)
INFO:tensorflow:global_step/sec: 142.096
INFO:tensorflow:loss = -4.1266e+09, step = 19101 (0.704 sec)
INFO:tensorflow:global_step/sec: 140.746
INFO:tensorflow:loss = -7.60534e+08, step = 19201 (0.711 sec)
INFO:tensorflow:global_step/sec: 141.724
INFO:tensorflow:loss = -5.60527e+09, step = 19301 (0.705 sec)
INFO:tensorflow:global_step/sec: 140.679
INFO:tensorflow:loss = -4.99277e+09, step = 19401 (0.711 sec)
INFO:tensorflow:global_step/sec: 140.131
INFO:tensorflow:loss = -7.80448e+09, step = 19501 (0.714 sec)
INFO:tensorflow:global_step/sec: 139.591
INFO:tensorflow:loss = -9.65823e+09, step = 19601 (0.717 sec)
INFO:tensorflow:global_step/sec: 141.45
INFO:tensorflow:loss = -7.32331e+09, step = 19701 (0.707 sec)
INFO:tensorflow:global_step/sec: 139.322
INFO:tensorflow:loss = -3.53409e+09, step = 19801 (0.718 sec)
INFO:tensorflow:global_step/sec: 140.21
INFO:tensorflow:loss = -9.99442e+09, step = 19901 (0.713 sec)
INFO:tensorflow:Saving checkpoints for 20000 into trained_models/auto-encoder-01/model.ckpt.
INFO:tensorflow:Loss for final step: -1.31205e+10.

* data input_fn:
================
Input file(s): data/data-*.csv
Batch size: 100
Epoch Count: 1
Mode: eval
Shuffle: False
================

[64]->[30, 3]-[30]->[64]
INFO:tensorflow:Starting evaluation at 2017-11-22-21:44:26
INFO:tensorflow:Restoring parameters from trained_models/auto-encoder-01/model.ckpt-20000
INFO:tensorflow:Finished evaluation at 2017-11-22-21:44:27
INFO:tensorflow:Saving dict for global step 20000: global_step = 20000, loss = -7.14748e+09, rmse = 0.360845
.......................................
Experiment finished at 21:44:29

Experiment elapsed time: 169.505653 seconds

6. Use the trained model to encode data (prediction)


In [13]:
import itertools

DATA_SIZE = 2000

input_fn = lambda: csv_input_fn(
    TRAIN_DATA_FILES_PATTERN,
    mode=tf.contrib.learn.ModeKeys.INFER,
    num_epochs=1,
    batch_size=500
)

estimator = create_estimator(run_config, hparams)

predictions = estimator.predict(input_fn=input_fn)
predictions = itertools.islice(predictions, DATA_SIZE)
predictions = list(map(lambda item: list(item["encoding"]), predictions))

print(predictions[:5])


INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x118884668>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 2000, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'trained_models/auto-encoder-01'}

Estimator Type: <class 'tensorflow.python.estimator.estimator.Estimator'>


* data input_fn:
================
Input file(s): data/data-*.csv
Batch size: 500
Epoch Count: 1
Mode: infer
Shuffle: False
================

WARNING:tensorflow:Input graph does not contain a QueueRunner. That means predict yields forever. This is probably a mistake.
[64]->[30, 3]-[30]->[64]
INFO:tensorflow:Restoring parameters from trained_models/auto-encoder-01/model.ckpt-20000
[[1218043.8, 2438282.0, 1206508.6], [5877218.5, 0.0, 5859628.0], [459047.19, 2725077.8, 449051.16], [0.0, 4565467.5, 0.0], [0.0, 4157996.5, 0.0]]

Visualise Encoded Data


In [14]:
y = pd.read_csv("data/data-01.csv", header=None, index_col=0)[65]

data_reduced = pd.DataFrame(predictions, columns=['c1','c2','c3'])
data_reduced['class'] = y
data_reduced.head()


Out[14]:
c1 c2 c3 class
0 1.218044e+06 2438282.00 1.206509e+06 2
1 5.877218e+06 0.00 5.859628e+06 0
2 4.590472e+05 2725077.75 4.490512e+05 0
3 0.000000e+00 4565467.50 0.000000e+00 2
4 0.000000e+00 4157996.50 0.000000e+00 2

In [28]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs=data_reduced.c2/1000000, ys=data_reduced.c3/1000000, zs=data_reduced.c1/1000000, c=data_reduced['class'], marker='o')
plt.show()


Notes:

  1. You can effectively implement a (linear) PCA by having only one hidden layer with no activation function

  2. To improve the efficiency of training the model, the weights of the encoder and decoder layers can be tied (i.e., have the same values)


In [ ]: